import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
df=pd.read_csv('D:/sql/Data set/Boston/listings_1.csv')
df.head()
| id | listing_url | scrape_id | last_scraped | source | name | description | neighborhood_overview | picture_url | host_id | ... | review_scores_communication | review_scores_location | review_scores_value | license | instant_bookable | calculated_host_listings_count | calculated_host_listings_count_entire_homes | calculated_host_listings_count_private_rooms | calculated_host_listings_count_shared_rooms | reviews_per_month | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 5.216497e+07 | https://www.airbnb.com/rooms/52164968 | 2.020000e+13 | 6/22/2023 | previous scrape | Home in Boston · ★4.90 · 1 bedroom · 1 bed · 1... | Whether you want to spend a weekend or a coupl... | We're located in a lovely, safe and quiet neig... | https://a0.muscache.com/pictures/miso/Hosting-... | 400176914 | ... | 4.94 | 4.74 | 4.81 | NaN | f | 2 | 1 | 1 | 0 | 1.45 |
| 1 | 5.107200e+07 | https://www.airbnb.com/rooms/51072005 | 2.020000e+13 | 6/22/2023 | city scrape | Place to stay in Boston · ★4.80 · 1 bedroom · ... | ------PERKS------<br />- PRIVATE DRIVEWAY<br /... | We're located in a lovely, safe and quiet neig... | https://a0.muscache.com/pictures/miso/Hosting-... | 400176914 | ... | 4.89 | 4.72 | 4.63 | STR-458877 | f | 2 | 1 | 1 | 0 | 7.03 |
| 2 | 3.344684e+07 | https://www.airbnb.com/rooms/33446838 | 2.020000e+13 | 6/21/2023 | city scrape | Home in Boston · 1 bedroom · 1 bed · 1 shared ... | 3 min walk to the T Blue Line Orient Heights 3... | Safe; no Crime; Great professional families | https://a0.muscache.com/pictures/96d0afc0-bf64... | 22402141 | ... | 5.00 | 5.00 | 5.00 | NaN | f | 1 | 0 | 1 | 0 | 0.02 |
| 3 | 1.861231e+07 | https://www.airbnb.com/rooms/18612312 | 2.020000e+13 | 6/22/2023 | city scrape | Home in Boston · 1 bedroom · 1 bed · 1 bath | Apartment located in East Boston. Renovated Ki... | NaN | https://a0.muscache.com/pictures/8e8f232b-eade... | 129278480 | ... | NaN | NaN | NaN | NaN | f | 5 | 0 | 5 | 0 | NaN |
| 4 | 7.990000e+17 | https://www.airbnb.com/rooms/798533622805798105 | 2.020000e+13 | 6/22/2023 | city scrape | Rental unit in Boston · ★4.57 · 1 bedroom · 1 ... | Peaceful and comfortable studio located in Eas... | NaN | https://a0.muscache.com/pictures/miso/Hosting-... | 193540040 | ... | 4.93 | 4.57 | 4.36 | STR-518875 | t | 2 | 2 | 0 | 0 | 3.21 |
5 rows × 75 columns
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 3973 entries, 0 to 3972 Data columns (total 75 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 id 3973 non-null float64 1 listing_url 3973 non-null object 2 scrape_id 3973 non-null float64 3 last_scraped 3973 non-null object 4 source 3973 non-null object 5 name 3973 non-null object 6 description 3954 non-null object 7 neighborhood_overview 2571 non-null object 8 picture_url 3973 non-null object 9 host_id 3973 non-null int64 10 host_url 3973 non-null object 11 host_name 3973 non-null object 12 host_since 3973 non-null object 13 host_location 3168 non-null object 14 host_about 2794 non-null object 15 host_response_time 3492 non-null object 16 host_response_rate 3492 non-null object 17 host_acceptance_rate 3484 non-null object 18 host_is_superhost 2670 non-null object 19 host_thumbnail_url 3973 non-null object 20 host_picture_url 3973 non-null object 21 host_neighbourhood 3852 non-null object 22 host_listings_count 3973 non-null int64 23 host_total_listings_count 3973 non-null int64 24 host_verifications 3973 non-null object 25 host_has_profile_pic 3973 non-null object 26 host_identity_verified 3973 non-null object 27 neighbourhood 2571 non-null object 28 neighbourhood_cleansed 3973 non-null object 29 neighbourhood_group_cleansed 0 non-null float64 30 latitude 3973 non-null float64 31 longitude 3973 non-null float64 32 property_type 3973 non-null object 33 room_type 3973 non-null object 34 accommodates 3973 non-null int64 35 bathrooms 0 non-null float64 36 bathrooms_text 3972 non-null object 37 bedrooms 2417 non-null float64 38 beds 3900 non-null float64 39 amenities 3973 non-null object 40 price 3973 non-null object 41 minimum_nights 3973 non-null int64 42 maximum_nights 3973 non-null int64 43 minimum_minimum_nights 3973 non-null int64 44 maximum_minimum_nights 3973 non-null int64 45 minimum_maximum_nights 3973 non-null int64 46 maximum_maximum_nights 3973 non-null int64 47 minimum_nights_avg_ntm 3973 non-null float64 48 maximum_nights_avg_ntm 3973 non-null float64 49 calendar_updated 0 non-null float64 50 has_availability 3973 non-null object 51 availability_30 3973 non-null int64 52 availability_60 3973 non-null int64 53 availability_90 3973 non-null int64 54 availability_365 3973 non-null int64 55 calendar_last_scraped 3973 non-null object 56 number_of_reviews 3973 non-null int64 57 number_of_reviews_ltm 3973 non-null int64 58 number_of_reviews_l30d 3973 non-null int64 59 first_review 2911 non-null object 60 last_review 2911 non-null object 61 review_scores_rating 2911 non-null float64 62 review_scores_accuracy 2903 non-null float64 63 review_scores_cleanliness 2904 non-null float64 64 review_scores_checkin 2902 non-null float64 65 review_scores_communication 2904 non-null float64 66 review_scores_location 2902 non-null float64 67 review_scores_value 2902 non-null float64 68 license 1888 non-null object 69 instant_bookable 3973 non-null object 70 calculated_host_listings_count 3973 non-null int64 71 calculated_host_listings_count_entire_homes 3973 non-null int64 72 calculated_host_listings_count_private_rooms 3973 non-null int64 73 calculated_host_listings_count_shared_rooms 3973 non-null int64 74 reviews_per_month 2911 non-null float64 dtypes: float64(19), int64(21), object(35) memory usage: 2.3+ MB
pd.DataFrame(df.apply(lambda col: len(col.unique())),columns=["Unique Values Count"])
| Unique Values Count | |
|---|---|
| id | 2847 |
| listing_url | 3973 |
| scrape_id | 1 |
| last_scraped | 2 |
| source | 2 |
| ... | ... |
| calculated_host_listings_count | 36 |
| calculated_host_listings_count_entire_homes | 33 |
| calculated_host_listings_count_private_rooms | 21 |
| calculated_host_listings_count_shared_rooms | 4 |
| reviews_per_month | 627 |
75 rows × 1 columns
cat_df=df.select_dtypes(include=['object']).columns.tolist()
cat_df
['listing_url', 'last_scraped', 'source', 'name', 'description', 'neighborhood_overview', 'picture_url', 'host_url', 'host_name', 'host_since', 'host_location', 'host_about', 'host_response_time', 'host_response_rate', 'host_acceptance_rate', 'host_is_superhost', 'host_thumbnail_url', 'host_picture_url', 'host_neighbourhood', 'host_verifications', 'host_has_profile_pic', 'host_identity_verified', 'neighbourhood', 'neighbourhood_cleansed', 'property_type', 'room_type', 'bathrooms_text', 'amenities', 'price', 'has_availability', 'calendar_last_scraped', 'first_review', 'last_review', 'license', 'instant_bookable']
df_object=df.loc[:,cat_df]
df_object.head()
| listing_url | last_scraped | source | name | description | neighborhood_overview | picture_url | host_url | host_name | host_since | ... | room_type | bathrooms_text | amenities | price | has_availability | calendar_last_scraped | first_review | last_review | license | instant_bookable | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | https://www.airbnb.com/rooms/52164968 | 6/22/2023 | previous scrape | Home in Boston · ★4.90 · 1 bedroom · 1 bed · 1... | Whether you want to spend a weekend or a coupl... | We're located in a lovely, safe and quiet neig... | https://a0.muscache.com/pictures/miso/Hosting-... | https://www.airbnb.com/users/show/400176914 | TuanAnh | 5/5/2021 | ... | Private room | 1 private bath | ["Body soap", "Bathtub", "Hair dryer", "Smoke ... | $130.00 | t | 6/22/2023 | 9/19/2021 | 7/4/2022 | NaN | f |
| 1 | https://www.airbnb.com/rooms/51072005 | 6/22/2023 | city scrape | Place to stay in Boston · ★4.80 · 1 bedroom · ... | ------PERKS------<br />- PRIVATE DRIVEWAY<br /... | We're located in a lovely, safe and quiet neig... | https://a0.muscache.com/pictures/miso/Hosting-... | https://www.airbnb.com/users/show/400176914 | TuanAnh | 5/5/2021 | ... | Entire home/apt | 1 bath | ["Cooking basics", "Hair dryer", "Smoke alarm"... | $150.00 | t | 6/22/2023 | 9/12/2021 | 6/8/2023 | STR-458877 | f |
| 2 | https://www.airbnb.com/rooms/33446838 | 6/21/2023 | city scrape | Home in Boston · 1 bedroom · 1 bed · 1 shared ... | 3 min walk to the T Blue Line Orient Heights 3... | Safe; no Crime; Great professional families | https://a0.muscache.com/pictures/96d0afc0-bf64... | https://www.airbnb.com/users/show/22402141 | Mirian | 10/11/2014 | ... | Private room | 1 shared bath | ["Indoor fireplace", "Cooking basics", "Washer... | $60.00 | t | 6/21/2023 | 8/18/2019 | 8/18/2019 | NaN | f |
| 3 | https://www.airbnb.com/rooms/18612312 | 6/22/2023 | city scrape | Home in Boston · 1 bedroom · 1 bed · 1 bath | Apartment located in East Boston. Renovated Ki... | NaN | https://a0.muscache.com/pictures/8e8f232b-eade... | https://www.airbnb.com/users/show/129278480 | Jay | 5/7/2017 | ... | Private room | 1 bath | ["Washer", "Hair dryer", "Smoke alarm", "Heati... | $50.00 | t | 6/22/2023 | NaN | NaN | NaN | f |
| 4 | https://www.airbnb.com/rooms/798533622805798105 | 6/22/2023 | city scrape | Rental unit in Boston · ★4.57 · 1 bedroom · 1 ... | Peaceful and comfortable studio located in Eas... | NaN | https://a0.muscache.com/pictures/miso/Hosting-... | https://www.airbnb.com/users/show/193540040 | Luciano | 6/4/2018 | ... | Entire home/apt | 1 bath | ["Body soap", "Free street parking", "Essentia... | $140.00 | t | 6/22/2023 | 2/12/2023 | 6/19/2023 | STR-518875 | t |
5 rows × 35 columns
num_df=df.select_dtypes(['float64','int64']).columns.tolist()
df_number=df.loc[:,num_df]
df_number.head()
| id | scrape_id | host_id | host_listings_count | host_total_listings_count | neighbourhood_group_cleansed | latitude | longitude | accommodates | bathrooms | ... | review_scores_cleanliness | review_scores_checkin | review_scores_communication | review_scores_location | review_scores_value | calculated_host_listings_count | calculated_host_listings_count_entire_homes | calculated_host_listings_count_private_rooms | calculated_host_listings_count_shared_rooms | reviews_per_month | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 5.216497e+07 | 2.020000e+13 | 400176914 | 2 | 2 | NaN | 42.392280 | -71.006760 | 2 | NaN | ... | 4.97 | 5.00 | 4.94 | 4.74 | 4.81 | 2 | 1 | 1 | 0 | 1.45 |
| 1 | 5.107200e+07 | 2.020000e+13 | 400176914 | 2 | 2 | NaN | 42.391317 | -71.007146 | 4 | NaN | ... | 4.84 | 4.95 | 4.89 | 4.72 | 4.63 | 2 | 1 | 1 | 0 | 7.03 |
| 2 | 3.344684e+07 | 2.020000e+13 | 22402141 | 1 | 3 | NaN | 42.390720 | -71.004560 | 1 | NaN | ... | 5.00 | 5.00 | 5.00 | 5.00 | 5.00 | 1 | 0 | 1 | 0 | 0.02 |
| 3 | 1.861231e+07 | 2.020000e+13 | 129278480 | 5 | 7 | NaN | 42.390310 | -71.001780 | 1 | NaN | ... | NaN | NaN | NaN | NaN | NaN | 5 | 0 | 5 | 0 | NaN |
| 4 | 7.990000e+17 | 2.020000e+13 | 193540040 | 2 | 2 | NaN | 42.390136 | -71.000661 | 2 | NaN | ... | 5.00 | 4.86 | 4.93 | 4.57 | 4.36 | 2 | 2 | 0 | 0 | 3.21 |
5 rows × 40 columns
dc=df_object.loc[:,['host_location','host_response_rate','host_acceptance_rate','has_availability','host_neighbourhood','instant_bookable','neighbourhood_cleansed','room_type','price']]
dn=df_number.loc[:,['id','bedrooms','accommodates','number_of_reviews_l30d']]
dc['host_location']=dc['host_location'].fillna(dc['host_location'].mode()[0])
dc['host_neighbourhood']=dc['host_neighbourhood'].fillna(dc['host_neighbourhood'].mode()[0])
dc['price']=dc['price'].apply(lambda x:x[1:])
dc['price']=dc['price'].apply(lambda x:float(x.replace(',', '')))
dc['price']
0 130.0
1 150.0
2 60.0
3 50.0
4 140.0
...
3968 140.0
3969 80.0
3970 28.0
3971 110.0
3972 115.0
Name: price, Length: 3973, dtype: float64
dc['host_response_rate']=dc['host_response_rate'].fillna('000')
dc['host_response_rate']=dc['host_response_rate'].apply(lambda x:int(x[:-1]))
dc['host_acceptance_rate']=dc['host_acceptance_rate'].fillna('000')
dc['host_acceptance_rate']=dc['host_acceptance_rate'].apply(lambda x:int(x[:-1]))
dn['bedrooms'].fillna(round(dn['accommodates']/2),inplace=True)
dn['bedrooms']=dn['bedrooms'].replace(0,1)
dn['bedrooms']
0 1.0
1 1.0
2 1.0
3 1.0
4 1.0
...
3968 2.0
3969 1.0
3970 1.0
3971 2.0
3972 1.0
Name: bedrooms, Length: 3973, dtype: float64
final_df=dn.join(dc)
d={'t':1,'f':0}
final_df['has_availability']=final_df['has_availability'].map(d)
final_df['instant_bookable']=final_df['instant_bookable'].map(d)
final_df['instant_bookable']
0 0
1 0
2 0
3 0
4 1
..
3968 0
3969 0
3970 0
3971 0
3972 0
Name: instant_bookable, Length: 3973, dtype: int64
final_df.head()
| id | bedrooms | accommodates | number_of_reviews_l30d | host_location | host_response_rate | host_acceptance_rate | has_availability | host_neighbourhood | instant_bookable | neighbourhood_cleansed | room_type | price | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 5.216497e+07 | 1.0 | 2 | 0 | Boston, MA | 0 | 100 | 1 | Harbor View / Orient Heights | 0 | East Boston | Private room | 130.0 |
| 1 | 5.107200e+07 | 1.0 | 4 | 7 | Boston, MA | 0 | 100 | 1 | Harbor View / Orient Heights | 0 | East Boston | Entire home/apt | 150.0 |
| 2 | 3.344684e+07 | 1.0 | 1 | 0 | Boston, MA | 100 | 100 | 1 | Cambridge | 0 | East Boston | Private room | 60.0 |
| 3 | 1.861231e+07 | 1.0 | 1 | 0 | Boston, MA | 100 | 100 | 1 | East Boston | 0 | East Boston | Private room | 50.0 |
| 4 | 7.990000e+17 | 1.0 | 2 | 6 | Boston, MA | 100 | 99 | 1 | Harbor View / Orient Heights | 1 | East Boston | Entire home/apt | 140.0 |
final_df.isna().sum()
id 0 bedrooms 0 accommodates 0 number_of_reviews_l30d 0 host_location 0 host_response_rate 0 host_acceptance_rate 0 has_availability 0 host_neighbourhood 0 instant_bookable 0 neighbourhood_cleansed 0 room_type 0 price 0 dtype: int64
final_df.describe().T
| count | mean | std | min | 25% | 50% | 75% | max | |
|---|---|---|---|---|---|---|---|---|
| id | 3973.0 | 2.745824e+17 | 3.643551e+17 | 3781.0 | 26108847.0 | 48905210.0 | 6.850000e+17 | 9.190000e+17 |
| bedrooms | 3973.0 | 1.520765e+00 | 9.114270e-01 | 1.0 | 1.0 | 1.0 | 2.000000e+00 | 1.300000e+01 |
| accommodates | 3973.0 | 3.218978e+00 | 2.227805e+00 | 1.0 | 2.0 | 2.0 | 4.000000e+00 | 1.600000e+01 |
| number_of_reviews_l30d | 3973.0 | 1.129625e+00 | 2.047783e+00 | 0.0 | 0.0 | 0.0 | 2.000000e+00 | 1.600000e+01 |
| host_response_rate | 3973.0 | 8.501007e+01 | 3.309416e+01 | 0.0 | 94.0 | 100.0 | 1.000000e+02 | 1.000000e+02 |
| host_acceptance_rate | 3973.0 | 7.793179e+01 | 3.508290e+01 | 0.0 | 69.0 | 97.0 | 1.000000e+02 | 1.000000e+02 |
| has_availability | 3973.0 | 9.554493e-01 | 2.063411e-01 | 0.0 | 1.0 | 1.0 | 1.000000e+00 | 1.000000e+00 |
| instant_bookable | 3973.0 | 2.479235e-01 | 4.318615e-01 | 0.0 | 0.0 | 0.0 | 0.000000e+00 | 1.000000e+00 |
| price | 3973.0 | 2.283088e+02 | 2.679322e+02 | 0.0 | 100.0 | 175.0 | 2.750000e+02 | 1.000000e+04 |
final_df.describe(include='object').T
| count | unique | top | freq | |
|---|---|---|---|---|
| host_location | 3973 | 121 | Boston, MA | 3056 |
| host_neighbourhood | 3973 | 111 | Cambridge | 563 |
| neighbourhood_cleansed | 3973 | 25 | Dorchester | 507 |
| room_type | 3973 | 4 | Entire home/apt | 2628 |
pd.DataFrame(final_df.apply(lambda col: len(col.unique())),columns=["Unique Values Count"])
| Unique Values Count | |
|---|---|
| id | 2847 |
| bedrooms | 9 |
| accommodates | 16 |
| number_of_reviews_l30d | 16 |
| host_location | 121 |
| host_response_rate | 37 |
| host_acceptance_rate | 65 |
| has_availability | 2 |
| host_neighbourhood | 111 |
| instant_bookable | 2 |
| neighbourhood_cleansed | 25 |
| room_type | 4 |
| price | 621 |
final_df.corr()
| id | bedrooms | accommodates | number_of_reviews_l30d | host_response_rate | host_acceptance_rate | has_availability | instant_bookable | price | |
|---|---|---|---|---|---|---|---|---|---|
| id | 1.000000 | 0.087348 | 0.080384 | -0.009583 | 0.210719 | 0.174509 | 0.154715 | 0.063821 | 0.070566 |
| bedrooms | 0.087348 | 1.000000 | 0.854293 | 0.069038 | 0.011053 | 0.061651 | 0.044412 | 0.047362 | 0.416448 |
| accommodates | 0.080384 | 0.854293 | 1.000000 | 0.114910 | 0.033367 | 0.123876 | 0.056827 | 0.127256 | 0.435901 |
| number_of_reviews_l30d | -0.009583 | 0.069038 | 0.114910 | 1.000000 | 0.213164 | 0.288548 | 0.116749 | 0.034822 | 0.027294 |
| host_response_rate | 0.210719 | 0.011053 | 0.033367 | 0.213164 | 1.000000 | 0.809663 | 0.535689 | 0.057904 | 0.028932 |
| host_acceptance_rate | 0.174509 | 0.061651 | 0.123876 | 0.288548 | 0.809663 | 1.000000 | 0.467106 | 0.131609 | 0.107997 |
| has_availability | 0.154715 | 0.044412 | 0.056827 | 0.116749 | 0.535689 | 0.467106 | 1.000000 | 0.123980 | 0.050851 |
| instant_bookable | 0.063821 | 0.047362 | 0.127256 | 0.034822 | 0.057904 | 0.131609 | 0.123980 | 1.000000 | 0.133159 |
| price | 0.070566 | 0.416448 | 0.435901 | 0.027294 | 0.028932 | 0.107997 | 0.050851 | 0.133159 | 1.000000 |
final_df['price_Group']= pd.cut(final_df['price'], bins=[-1,100,200,300,400,500,600,700,800,900,np.inf], labels=['<100.0', '100-200', '200-300', '300-400', '400-500','500-600','600-700','700-800','800-900','900+'])
final_df['price_Group'].isna().sum()
0
final_df['response_Group']= pd.cut(final_df['host_response_rate'], bins=[-1,10,20,30,40,50,60,70,80,90,np.inf], labels=['<10', '10-20', '20-30', '30-40', '40-50','50-60','60-70','70-80','80-90','90+'])
final_df['response_Group'].isna().sum()
0
final_df['accept_Group']= pd.cut(final_df['host_acceptance_rate'], bins=[-1,10,20,30,40,50,60,70,80,90,np.inf], labels=['<10', '10-20', '20-30', '30-40', '40-50','50-60','60-70','70-80','80-90','90+'])
final_df['accept_Group'].isna().sum()
0
fig, axes = plt.subplots(1,4, figsize = (16,5))
idx = 0
for col in ['has_availability', 'instant_bookable', 'room_type', 'bedrooms']:
sns.histplot(data = final_df[col], ax = axes[idx],)
idx+=1
plt.hist(final_df['price'],bins=15,range=[0,1000])
plt.figure()
<Figure size 432x288 with 0 Axes>
<Figure size 432x288 with 0 Axes>
final_df['has_availability'].value_counts().plot.pie(autopct='%1.1f%%')
<AxesSubplot:ylabel='has_availability'>
final_df['instant_bookable'].value_counts().plot.pie(autopct='%1.1f%%')
<AxesSubplot:ylabel='instant_bookable'>
final_df['room_type'].value_counts().plot(kind='bar')
<AxesSubplot:>
final_df['bedrooms'].value_counts().plot(kind='bar')
<AxesSubplot:>
final_df['accommodates'].value_counts().plot(kind='bar')
<AxesSubplot:>
plt.figure(figsize=(10, 6))
sns.countplot(x='price_Group', hue='room_type', data=final_df)
plt.xticks(rotation='vertical')
plt.legend(loc='upper right')
plt.show()
plt.figure(figsize=(10, 7))
sns.countplot(x='price_Group', hue='bedrooms', data=final_df)
plt.xticks(rotation='vertical')
plt.legend( loc = 'upper right')
plt.show()
plt.figure(figsize=(10, 6))
sns.countplot(x='price_Group', hue='accept_Group', data=final_df)
plt.xticks(rotation='vertical')
plt.legend(loc='upper right')
plt.show()
plt.figure(figsize=(10, 6))
sns.countplot(x='price_Group', hue='response_Group', data=final_df)
plt.xticks(rotation='vertical')
plt.legend(loc='upper right')
plt.show()
plt.figure(figsize=(10, 6))
sns.countplot(x='price_Group', hue='has_availability', data=final_df)
plt.xticks(rotation='vertical')
plt.legend(loc='upper right')
plt.show()
plt.figure(figsize=(10, 6))
sns.countplot(x='price_Group', hue='instant_bookable', data=final_df)
plt.xticks(rotation='vertical')
plt.legend(loc='upper right')
plt.show()
df['price']=df['price'].apply(lambda x:x[1:])
df['price']=df['price'].apply(lambda x:float(x.replace(',', '')))
df['price'].isna().sum()
0
df['colur']= pd.cut(df['price'], bins=[0,100,200,300,400,500,600,700,800,900,np.inf], labels=['<100.0', '100-200', '200-300', '300-400', '400-500','500-600','600-700','700-800','800-900','900+'])
df['colur']=df['colur'].fillna('100-200')
df['colur'].isna().sum()
0
df['colur'].value_counts()
100-200 1291 <100.0 1034 200-300 846 300-400 365 400-500 168 500-600 96 900+ 70 600-700 48 800-900 29 700-800 26 Name: colur, dtype: int64
import plotly.express as px
df['size'] = 22
fig = px.scatter_mapbox(df,
lat='latitude',
lon='longitude',
zoom=9,
size_max=1, # manually set largest size
color='colur',
hover_name="colur")
fig.update_layout(mapbox_style="open-street-map")
fig.show()